import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
print('modules are imported')
modules are imported
dataset_url='https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv'
df=pd.read_csv(dataset_url)
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 |
| 1 | 2020-01-23 | Afghanistan | 0 | 0 | 0 |
| 2 | 2020-01-24 | Afghanistan | 0 | 0 | 0 |
| 3 | 2020-01-25 | Afghanistan | 0 | 0 | 0 |
| 4 | 2020-01-26 | Afghanistan | 0 | 0 | 0 |
df.tail()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 161563 | 2022-04-12 | Zimbabwe | 247094 | 0 | 5460 |
| 161564 | 2022-04-13 | Zimbabwe | 247160 | 0 | 5460 |
| 161565 | 2022-04-14 | Zimbabwe | 247208 | 0 | 5462 |
| 161566 | 2022-04-15 | Zimbabwe | 247237 | 0 | 5462 |
| 161567 | 2022-04-16 | Zimbabwe | 247237 | 0 | 5462 |
df.shape
(161568, 5)
df = df[df.Confirmed > 0]
df
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 33 | 2020-02-24 | Afghanistan | 5 | 0 | 0 |
| 34 | 2020-02-25 | Afghanistan | 5 | 0 | 0 |
| 35 | 2020-02-26 | Afghanistan | 5 | 0 | 0 |
| 36 | 2020-02-27 | Afghanistan | 5 | 0 | 0 |
| 37 | 2020-02-28 | Afghanistan | 5 | 0 | 0 |
| ... | ... | ... | ... | ... | ... |
| 161563 | 2022-04-12 | Zimbabwe | 247094 | 0 | 5460 |
| 161564 | 2022-04-13 | Zimbabwe | 247160 | 0 | 5460 |
| 161565 | 2022-04-14 | Zimbabwe | 247208 | 0 | 5462 |
| 161566 | 2022-04-15 | Zimbabwe | 247237 | 0 | 5462 |
| 161567 | 2022-04-16 | Zimbabwe | 247237 | 0 | 5462 |
148455 rows × 5 columns
df[df.Country == 'Italy']
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 70185 | 2020-01-31 | Italy | 2 | 0 | 0 |
| 70186 | 2020-02-01 | Italy | 2 | 0 | 0 |
| 70187 | 2020-02-02 | Italy | 2 | 0 | 0 |
| 70188 | 2020-02-03 | Italy | 2 | 0 | 0 |
| 70189 | 2020-02-04 | Italy | 2 | 0 | 0 |
| ... | ... | ... | ... | ... | ... |
| 70987 | 2022-04-12 | Italy | 15404809 | 0 | 161032 |
| 70988 | 2022-04-13 | Italy | 15467395 | 0 | 161187 |
| 70989 | 2022-04-14 | Italy | 15533012 | 0 | 161336 |
| 70990 | 2022-04-15 | Italy | 15595302 | 0 | 161469 |
| 70991 | 2022-04-16 | Italy | 15659835 | 0 | 161602 |
807 rows × 5 columns
fig = px.choropleth(df , locations= 'Country' , locationmode= 'country names' , color='Confirmed', animation_frame= 'Date')
fig.update_layout(title_text = 'Global Spread of COVID-19 ')
fig.show()
fig = px.choropleth(df , locations= 'Country' , locationmode= 'country names' , color='Deaths', animation_frame= 'Date')
fig.update_layout(title_text = 'Global Spread of COVID-19 ')
fig.show()
df_china = df[df.Country == 'China']
df_china.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 30192 | 2020-01-22 | China | 548 | 28 | 17 |
| 30193 | 2020-01-23 | China | 643 | 30 | 18 |
| 30194 | 2020-01-24 | China | 920 | 36 | 26 |
| 30195 | 2020-01-25 | China | 1406 | 39 | 42 |
| 30196 | 2020-01-26 | China | 2075 | 49 | 56 |
let's select the columns that we need
df_china=df_china[['Date','Confirmed']]
df_china
| Date | Confirmed | |
|---|---|---|
| 30192 | 2020-01-22 | 548 |
| 30193 | 2020-01-23 | 643 |
| 30194 | 2020-01-24 | 920 |
| 30195 | 2020-01-25 | 1406 |
| 30196 | 2020-01-26 | 2075 |
| ... | ... | ... |
| 31003 | 2022-04-12 | 1655477 |
| 31004 | 2022-04-13 | 1681437 |
| 31005 | 2022-04-14 | 1705231 |
| 31006 | 2022-04-15 | 1759128 |
| 31007 | 2022-04-16 | 1760211 |
816 rows × 2 columns
calculating the first derivation of confrimed column
df_china['Infection Rate'] = df_china['Confirmed'].diff()
df_china.head()
| Date | Confirmed | Infection Rate | |
|---|---|---|---|
| 30192 | 2020-01-22 | 548 | NaN |
| 30193 | 2020-01-23 | 643 | 95.0 |
| 30194 | 2020-01-24 | 920 | 277.0 |
| 30195 | 2020-01-25 | 1406 | 486.0 |
| 30196 | 2020-01-26 | 2075 | 669.0 |
px.line(df_china , x= 'Date', y= ['Confirmed','Infection Rate' ])
df_china['Infection Rate'].max()
77402.0
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 33 | 2020-02-24 | Afghanistan | 5 | 0 | 0 |
| 34 | 2020-02-25 | Afghanistan | 5 | 0 | 0 |
| 35 | 2020-02-26 | Afghanistan | 5 | 0 | 0 |
| 36 | 2020-02-27 | Afghanistan | 5 | 0 | 0 |
| 37 | 2020-02-28 | Afghanistan | 5 | 0 | 0 |
Countries = list(df['Country'].unique())
max_Infection_Rate = []
for c in Countries :
MIR = df[df.Country == c ].Confirmed.diff().max()
max_Infection_Rate.append(MIR)
df_MIR = pd.DataFrame()
df_MIR['Country'] = Countries
df_MIR['Max Infection Rate']=max_Infection_Rate
df_MIR.head()
| Country | Max Infection Rate | |
|---|---|---|
| 0 | Afghanistan | 3243.0 |
| 1 | Albania | 4789.0 |
| 2 | Algeria | 2521.0 |
| 3 | Andorra | 2313.0 |
| 4 | Angola | 5035.0 |
px.bar (df_MIR , x= 'Country' , y= 'Max Infection Rate' ,color='Country',title='Global Max Infection Rate',log_y=True )
On 9 March 2020, the government of Italy under Prime Minister Giuseppe Conte imposed a national quarantine, restricting the movement of the population except for necessity, work, and health circumstances, in response to the growing pandemic of COVID-19 in the country. source
italy_lockdown_start_date = '2020-03-09'
italy_lockdown_a_month_later = '2020-04-09'
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 33 | 2020-02-24 | Afghanistan | 5 | 0 | 0 |
| 34 | 2020-02-25 | Afghanistan | 5 | 0 | 0 |
| 35 | 2020-02-26 | Afghanistan | 5 | 0 | 0 |
| 36 | 2020-02-27 | Afghanistan | 5 | 0 | 0 |
| 37 | 2020-02-28 | Afghanistan | 5 | 0 | 0 |
let's get data related to italy
df_italy = df[df.Country == 'Italy']
lets check the dataframe
df_italy
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 70185 | 2020-01-31 | Italy | 2 | 0 | 0 |
| 70186 | 2020-02-01 | Italy | 2 | 0 | 0 |
| 70187 | 2020-02-02 | Italy | 2 | 0 | 0 |
| 70188 | 2020-02-03 | Italy | 2 | 0 | 0 |
| 70189 | 2020-02-04 | Italy | 2 | 0 | 0 |
| ... | ... | ... | ... | ... | ... |
| 70987 | 2022-04-12 | Italy | 15404809 | 0 | 161032 |
| 70988 | 2022-04-13 | Italy | 15467395 | 0 | 161187 |
| 70989 | 2022-04-14 | Italy | 15533012 | 0 | 161336 |
| 70990 | 2022-04-15 | Italy | 15595302 | 0 | 161469 |
| 70991 | 2022-04-16 | Italy | 15659835 | 0 | 161602 |
807 rows × 5 columns
let's calculate the infection rate in Italy
df_italy['Infection Rate']= df_italy.Confirmed.diff()
df_italy.head()
C:\Users\ASUS\AppData\Local\Temp/ipykernel_13748/2684023470.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | |
|---|---|---|---|---|---|---|
| 70185 | 2020-01-31 | Italy | 2 | 0 | 0 | NaN |
| 70186 | 2020-02-01 | Italy | 2 | 0 | 0 | 0.0 |
| 70187 | 2020-02-02 | Italy | 2 | 0 | 0 | 0.0 |
| 70188 | 2020-02-03 | Italy | 2 | 0 | 0 | 0.0 |
| 70189 | 2020-02-04 | Italy | 2 | 0 | 0 | 0.0 |
ok! now let's do the visualization
fig = px.line(df_italy,x='Date',y='Infection Rate', title= 'Before and After Lockdown in Italy ')
fig.add_shape(
dict(
type = "line",
x0 = italy_lockdown_start_date,
y0 =0,
x1 = italy_lockdown_start_date,
y1 =df_italy['Infection Rate'].max(),
line = dict(color = 'red' , width=2)
)
)
fig.add_annotation(
dict(
x=italy_lockdown_start_date,
y=df_italy['Infection Rate'].max(),
text="Starting date of the lockdown"
)
)
fig.add_shape(
dict(
type = "line",
x0 = italy_lockdown_a_month_later,
y0 =0,
x1 = italy_lockdown_a_month_later,
y1 =df_italy['Infection Rate'].max(),
line = dict(color = 'orange' , width=2)
)
)
fig.add_annotation(
dict(
x=italy_lockdown_a_month_later,
y=0,
text="A Monte Later of the lockdown")
)
df_italy.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | |
|---|---|---|---|---|---|---|
| 70185 | 2020-01-31 | Italy | 2 | 0 | 0 | NaN |
| 70186 | 2020-02-01 | Italy | 2 | 0 | 0 | 0.0 |
| 70187 | 2020-02-02 | Italy | 2 | 0 | 0 | 0.0 |
| 70188 | 2020-02-03 | Italy | 2 | 0 | 0 | 0.0 |
| 70189 | 2020-02-04 | Italy | 2 | 0 | 0 | 0.0 |
let's calculate number of active cases day by day
df_italy['Deaths Rate'] = df_italy.Deaths.diff()
C:\Users\ASUS\AppData\Local\Temp/ipykernel_13748/3823060142.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
let's check the dataframe again
df_italy.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | Deaths Rate | |
|---|---|---|---|---|---|---|---|
| 70185 | 2020-01-31 | Italy | 2 | 0 | 0 | NaN | NaN |
| 70186 | 2020-02-01 | Italy | 2 | 0 | 0 | 0.0 | 0.0 |
| 70187 | 2020-02-02 | Italy | 2 | 0 | 0 | 0.0 | 0.0 |
| 70188 | 2020-02-03 | Italy | 2 | 0 | 0 | 0.0 | 0.0 |
| 70189 | 2020-02-04 | Italy | 2 | 0 | 0 | 0.0 | 0.0 |
now let's plot a line chart to compare COVID19 national lockdowns impacts on spread of the virus and number of active cases
fig = px.line(df_italy,x='Date',y=['Infection Rate','Deaths Rate'])
fig.show()
df_italy['Infection Rate'] = df_italy['Infection Rate']/df_italy['Infection Rate'].max()
C:\Users\ASUS\AppData\Local\Temp/ipykernel_13748/23156089.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_italy['Deaths Rate'] = df_italy['Deaths Rate']/df_italy['Deaths Rate'].max()
C:\Users\ASUS\AppData\Local\Temp/ipykernel_13748/3670634413.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
fig = px.line(df_italy,x='Date',y=['Infection Rate','Deaths Rate'])
fig.add_shape(
dict(
type = "line",
x0 = italy_lockdown_start_date,
y0 =0,
x1 = italy_lockdown_start_date,
y1 =df_italy['Infection Rate'].max(),
line = dict(color = 'green' , width=2)
)
)
fig.add_annotation(
dict(
x=italy_lockdown_start_date,
y=df_italy['Infection Rate'].max(),
text="Starting date of the lockdown"
)
)
fig.add_shape(
dict(
type = "line",
x0 = italy_lockdown_a_month_later,
y0 =0,
x1 = italy_lockdown_a_month_later,
y1 =df_italy['Infection Rate'].max(),
line = dict(color = 'orange' , width=2)
)
)
fig.add_annotation(
dict(
x=italy_lockdown_a_month_later,
y=0,
text="A Monte Later of the lockdown")
)